import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# igonre the warning
import os
import warnings
warnings.filterwarnings('ignore')
Sources_df =pd.read_csv(r"D:\2-Project\9-Waterproject\Source.csv")
Sources_df
| SourceID | Source Name | Industry Sector | |
|---|---|---|---|
| 0 | 1 | Residential | - |
| 1 | 2 | Industrial | Manufacturing |
| 2 | 3 | Healthcare | Medical |
| 3 | 4 | Other | Miscellaneous |
Trtplant_df= pd.read_csv(r"D:\2-Project\9-Waterproject\Treatment Plant.csv")
Trtplant_df
| PlantID | name | Location | Capacity | |
|---|---|---|---|---|
| 0 | 1 | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day |
| 1 | 2 | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day |
| 2 | 3 | Sacramento Treatment Plant | Sacramento, CA | 25 million gallons per day |
| 3 | 4 | Los Angeles Treatment Plant | Los Angeles, CA | 150 million gallons per day |
| 4 | 5 | San Diego Treatment Plant | San Diego, CA | 100 million gallons per day |
| 5 | 6 | Phoenix Treatment Plant | Phoenix, AZ | 75 million gallons per day |
| 6 | 7 | Denver Treatment Plant | Denver, CO | 50 million gallons per day |
| 7 | 8 | Minneapolis Treatment Plant | Minneapolis, MN | 25 million gallons per day |
| 8 | 9 | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day |
| 9 | 10 | New York Treatment Plant | New York, NY | 100 million gallons per day |
WstWtrtrt_df = pd.read_csv(r"D:\2-Project\9-Waterproject\WasteWaterTreatmentFact.csv")
WstWtrtrt_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | |
|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 1/3/2022 | 33 | Pass |
| 1 | 2 | 1 | 2 | 1/3/2022 | 27 | Fail |
| 2 | 3 | 1 | 3 | 1/3/2022 | 28 | Pass |
| 3 | 4 | 2 | 1 | 1/3/2022 | 16 | Pass |
| 4 | 5 | 2 | 2 | 1/3/2022 | 16 | Pass |
| ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2/6/2022 | 44 | Pass |
| 1046 | 1047 | 9 | 3 | 2/6/2022 | 42 | Pass |
| 1047 | 1048 | 10 | 1 | 2/6/2022 | 29 | Pass |
| 1048 | 1049 | 10 | 2 | 2/6/2022 | 29 | Pass |
| 1049 | 1050 | 10 | 3 | 2/6/2022 | 29 | Fail |
1050 rows × 6 columns
Sources_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4 entries, 0 to 3 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SourceID 4 non-null int64 1 Source Name 4 non-null object 2 Industry Sector 4 non-null object dtypes: int64(1), object(2) memory usage: 228.0+ bytes
Trtplant_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10 entries, 0 to 9 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 PlantID 10 non-null int64 1 name 10 non-null object 2 Location 10 non-null object 3 Capacity 10 non-null object dtypes: int64(1), object(3) memory usage: 452.0+ bytes
WstWtrtrt_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1050 entries, 0 to 1049 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SampleID 1050 non-null int64 1 PlantID 1050 non-null int64 2 SourceID 1050 non-null int64 3 Date 1050 non-null object 4 Volume of Water Treated 1050 non-null int64 5 Result 1050 non-null object dtypes: int64(4), object(2) memory usage: 49.3+ KB
# checking the null values and duplicate row
Sources_df.isnull().sum()
SourceID 0 Source Name 0 Industry Sector 0 dtype: int64
Trtplant_df.isnull().sum()
PlantID 0 name 0 Location 0 Capacity 0 dtype: int64
WstWtrtrt_df.isnull().sum()
SampleID 0 PlantID 0 SourceID 0 Date 0 Volume of Water Treated 0 Result 0 dtype: int64
# check duplicates values
Sources_df.duplicated().sum()
0
Trtplant_df.duplicated().sum()
0
WstWtrtrt_df.duplicated().sum()
0
# change the data types
Sources_df.dtypes
SourceID int64 Source Name object Industry Sector object dtype: object
Trtplant_df.dtypes
PlantID int64 name object Location object Capacity object dtype: object
WstWtrtrt_df.dtypes
SampleID int64 PlantID int64 SourceID int64 Date object Volume of Water Treated int64 Result object dtype: object
WstWtrtrt_df['Date'] = pd.to_datetime(WstWtrtrt_df['Date'], format='mixed', dayfirst=True)
WstWtrtrt_df.dtypes
SampleID int64 PlantID int64 SourceID int64 Date datetime64[ns] Volume of Water Treated int64 Result object dtype: object
# Deriving new column day name
WstWtrtrt_df['Day Name']=WstWtrtrt_df['Date'].dt.day_name()
WstWtrtrt_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | Day Name | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 2022-03-01 | 33 | Pass | Tuesday |
| 1 | 2 | 1 | 2 | 2022-03-01 | 27 | Fail | Tuesday |
| 2 | 3 | 1 | 3 | 2022-03-01 | 28 | Pass | Tuesday |
| 3 | 4 | 2 | 1 | 2022-03-01 | 16 | Pass | Tuesday |
| 4 | 5 | 2 | 2 | 2022-03-01 | 16 | Pass | Tuesday |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2022-06-02 | 44 | Pass | Thursday |
| 1046 | 1047 | 9 | 3 | 2022-06-02 | 42 | Pass | Thursday |
| 1047 | 1048 | 10 | 1 | 2022-06-02 | 29 | Pass | Thursday |
| 1048 | 1049 | 10 | 2 | 2022-06-02 | 29 | Pass | Thursday |
| 1049 | 1050 | 10 | 3 | 2022-06-02 | 29 | Fail | Thursday |
1050 rows × 7 columns
WstWtrtrt_df['Day Name']
0 Tuesday
1 Tuesday
2 Tuesday
3 Tuesday
4 Tuesday
...
1045 Thursday
1046 Thursday
1047 Thursday
1048 Thursday
1049 Thursday
Name: Day Name, Length: 1050, dtype: object
WstWtrtrt_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1050 entries, 0 to 1049 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SampleID 1050 non-null int64 1 PlantID 1050 non-null int64 2 SourceID 1050 non-null int64 3 Date 1050 non-null datetime64[ns] 4 Volume of Water Treated 1050 non-null int64 5 Result 1050 non-null object 6 Day Name 1050 non-null object dtypes: datetime64[ns](1), int64(4), object(2) memory usage: 57.6+ KB
# clean capacity column data
Trtplant_df['Capacity']
0 100 million gallons per day 1 50 million gallons per day 2 25 million gallons per day 3 150 million gallons per day 4 100 million gallons per day 5 75 million gallons per day 6 50 million gallons per day 7 25 million gallons per day 8 150 million gallons per day 9 100 million gallons per day Name: Capacity, dtype: object
Trtplant_df['Capacity'].str.split(' ').str.get(0).astype('int')
0 100 1 50 2 25 3 150 4 100 5 75 6 50 7 25 8 150 9 100 Name: Capacity, dtype: int32
Trtplant_df['Capacity_new'] = Trtplant_df['Capacity'].str.split(' ').str.get(0).astype('int')
Trtplant_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10 entries, 0 to 9 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 PlantID 10 non-null int64 1 name 10 non-null object 2 Location 10 non-null object 3 Capacity 10 non-null object 4 Capacity_new 10 non-null int32 dtypes: int32(1), int64(1), object(3) memory usage: 492.0+ bytes
Trtplant_df['Capacity_new']
0 100 1 50 2 25 3 150 4 100 5 75 6 50 7 25 8 150 9 100 Name: Capacity_new, dtype: int32
# Establin g new realtionship between sources_df and trtplant_df ,Trtfact_df
WstWtrtrt_Sources_df = pd.merge(WstWtrtrt_df,Sources_df,on ='SourceID',how='left')
WstWtrtrt_Sources_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | Day Name | Source Name | Industry Sector | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 2022-03-01 | 33 | Pass | Tuesday | Residential | - |
| 1 | 2 | 1 | 2 | 2022-03-01 | 27 | Fail | Tuesday | Industrial | Manufacturing |
| 2 | 3 | 1 | 3 | 2022-03-01 | 28 | Pass | Tuesday | Healthcare | Medical |
| 3 | 4 | 2 | 1 | 2022-03-01 | 16 | Pass | Tuesday | Residential | - |
| 4 | 5 | 2 | 2 | 2022-03-01 | 16 | Pass | Tuesday | Industrial | Manufacturing |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2022-06-02 | 44 | Pass | Thursday | Industrial | Manufacturing |
| 1046 | 1047 | 9 | 3 | 2022-06-02 | 42 | Pass | Thursday | Healthcare | Medical |
| 1047 | 1048 | 10 | 1 | 2022-06-02 | 29 | Pass | Thursday | Residential | - |
| 1048 | 1049 | 10 | 2 | 2022-06-02 | 29 | Pass | Thursday | Industrial | Manufacturing |
| 1049 | 1050 | 10 | 3 | 2022-06-02 | 29 | Fail | Thursday | Healthcare | Medical |
1050 rows × 9 columns
# Establish new relationship between trtplant_df & wstwtrt_df
WstWtrtrt_TrtPlant_df= pd.merge(WstWtrtrt_Sources_df,Trtplant_df,on='PlantID',how='left')
WstWtrtrt_TrtPlant_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | Day Name | Source Name | Industry Sector | name | Location | Capacity | Capacity_new | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 2022-03-01 | 33 | Pass | Tuesday | Residential | - | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 1 | 2 | 1 | 2 | 2022-03-01 | 27 | Fail | Tuesday | Industrial | Manufacturing | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 2 | 3 | 1 | 3 | 2022-03-01 | 28 | Pass | Tuesday | Healthcare | Medical | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 3 | 4 | 2 | 1 | 2022-03-01 | 16 | Pass | Tuesday | Residential | - | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| 4 | 5 | 2 | 2 | 2022-03-01 | 16 | Pass | Tuesday | Industrial | Manufacturing | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2022-06-02 | 44 | Pass | Thursday | Industrial | Manufacturing | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1046 | 1047 | 9 | 3 | 2022-06-02 | 42 | Pass | Thursday | Healthcare | Medical | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1047 | 1048 | 10 | 1 | 2022-06-02 | 29 | Pass | Thursday | Residential | - | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1048 | 1049 | 10 | 2 | 2022-06-02 | 29 | Pass | Thursday | Industrial | Manufacturing | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1049 | 1050 | 10 | 3 | 2022-06-02 | 29 | Fail | Thursday | Healthcare | Medical | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
1050 rows × 13 columns
# Plant that is treating maximum and minimum volumne of waste water
WstWtrtrt_TrtPlant_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | Day Name | Source Name | Industry Sector | name | Location | Capacity | Capacity_new | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 2022-03-01 | 33 | Pass | Tuesday | Residential | - | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 1 | 2 | 1 | 2 | 2022-03-01 | 27 | Fail | Tuesday | Industrial | Manufacturing | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 2 | 3 | 1 | 3 | 2022-03-01 | 28 | Pass | Tuesday | Healthcare | Medical | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 3 | 4 | 2 | 1 | 2022-03-01 | 16 | Pass | Tuesday | Residential | - | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| 4 | 5 | 2 | 2 | 2022-03-01 | 16 | Pass | Tuesday | Industrial | Manufacturing | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2022-06-02 | 44 | Pass | Thursday | Industrial | Manufacturing | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1046 | 1047 | 9 | 3 | 2022-06-02 | 42 | Pass | Thursday | Healthcare | Medical | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1047 | 1048 | 10 | 1 | 2022-06-02 | 29 | Pass | Thursday | Residential | - | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1048 | 1049 | 10 | 2 | 2022-06-02 | 29 | Pass | Thursday | Industrial | Manufacturing | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1049 | 1050 | 10 | 3 | 2022-06-02 | 29 | Fail | Thursday | Healthcare | Medical | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
1050 rows × 13 columns
WstWtrtrt_TrtPlant_df_grouped = WstWtrtrt_TrtPlant_df.groupby('name',as_index = False)['Volume of Water Treated'].sum()
WstWtrtrt_TrtPlant_df_grouped
| name | Volume of Water Treated | |
|---|---|---|
| 0 | Chicago Treatment Plant | 4577 |
| 1 | Denver Treatment Plant | 1298 |
| 2 | Los Angeles Treatment Plant | 4983 |
| 3 | Minneapolis Treatment Plant | 677 |
| 4 | New York Treatment Plant | 3145 |
| 5 | Oakland Treatment Plant | 1518 |
| 6 | Phoenix Treatment Plant | 2377 |
| 7 | Sacramento Treatment Plant | 668 |
| 8 | San Diego Treatment Plant | 2810 |
| 9 | San Francisco Treatment Plant | 3133 |
# plot total volume of waste water treated by different treatment plants
fig =px.bar(WstWtrtrt_TrtPlant_df_grouped.sort_values(by='Volume of Water Treated',ascending=False),template='plotly_dark',x='name', y='Volume of Water Treated',color= 'name',text='Volume of Water Treated')
fig.update_layout(xaxis_title='Treatment Plant Name',yaxis_title = 'Volume of Waste Water Treated(million galon)',title=dict(text='Total Volume of waste Water Treated by Different Treatment Plants',x=0.5),width=1000)
# KPI 2 : % Contribution of waste water from different Sources
WstWtrtrt_Sources_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | Day Name | Source Name | Industry Sector | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 2022-03-01 | 33 | Pass | Tuesday | Residential | - |
| 1 | 2 | 1 | 2 | 2022-03-01 | 27 | Fail | Tuesday | Industrial | Manufacturing |
| 2 | 3 | 1 | 3 | 2022-03-01 | 28 | Pass | Tuesday | Healthcare | Medical |
| 3 | 4 | 2 | 1 | 2022-03-01 | 16 | Pass | Tuesday | Residential | - |
| 4 | 5 | 2 | 2 | 2022-03-01 | 16 | Pass | Tuesday | Industrial | Manufacturing |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2022-06-02 | 44 | Pass | Thursday | Industrial | Manufacturing |
| 1046 | 1047 | 9 | 3 | 2022-06-02 | 42 | Pass | Thursday | Healthcare | Medical |
| 1047 | 1048 | 10 | 1 | 2022-06-02 | 29 | Pass | Thursday | Residential | - |
| 1048 | 1049 | 10 | 2 | 2022-06-02 | 29 | Pass | Thursday | Industrial | Manufacturing |
| 1049 | 1050 | 10 | 3 | 2022-06-02 | 29 | Fail | Thursday | Healthcare | Medical |
1050 rows × 9 columns
WstWtrtrt_Sources_grouped_df=WstWtrtrt_Sources_df.groupby('Source Name',as_index=False)['Volume of Water Treated'].sum()
WstWtrtrt_Sources_grouped_df
| Source Name | Volume of Water Treated | |
|---|---|---|
| 0 | Healthcare | 8403 |
| 1 | Industrial | 8408 |
| 2 | Residential | 8375 |
fig=px.pie(WstWtrtrt_Sources_grouped_df,names='Source Name',values='Volume of Water Treated',template='plotly_dark',hole=0.5)
fig.update_layout(width=500,title=dict(text='% Contribution of waste water from different Sources',x=0.5))
fig.show()
# KPI 3 : Identify Highly utilized Treatment Plant
# Summation on Volume of Waste Water on the basis of Plant name and Date
WstWtrtrt_TrtPlant_grp_vol_df=WstWtrtrt_TrtPlant_df.groupby(['name','Date'],as_index=False)['Volume of Water Treated'].sum().sort_values(by=['name','Date'])
# Taking mean of capacity_new column on the basis of Plant name and Date
WstWtrtrt_TrtPlant_grp_cap_df=WstWtrtrt_TrtPlant_df.groupby(['name','Date'],as_index=False)['Capacity_new'].mean().sort_values(by=['name','Date'])
# Identifying Utilization of Treatment Plants on Daily basis
WstWtrtrt_TrtPlant_grp_cap_df['Utilization']=WstWtrtrt_TrtPlant_grp_vol_df['Volume of Water Treated']/WstWtrtrt_TrtPlant_grp_cap_df['Capacity_new'] * 100
# Rounding data to 2 places
WstWtrtrt_TrtPlant_grp_cap_df['Utilization']=WstWtrtrt_TrtPlant_grp_cap_df['Utilization'].round(2)
# Identifying average utilization of treatment plant
Avg_utilization_df=WstWtrtrt_TrtPlant_grp_cap_df.groupby('name',as_index=False)['Utilization'].mean().round(2)
# Plotting Utilization of Different Treatment Plants
fig=px.bar(Avg_utilization_df.sort_values(by='Utilization'),x='Utilization',y='name',template='plotly_dark',color='Utilization',text='Utilization')
fig.update_layout(title=dict(text='Utilization of Treatment Plants',x=0.5),width=1000)
fig.show()
# KPI 4 Identifying Highly Efficient treatment plant
WstWtrtrt_TrtPlant_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | Day Name | Source Name | Industry Sector | name | Location | Capacity | Capacity_new | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 2022-03-01 | 33 | Pass | Tuesday | Residential | - | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 1 | 2 | 1 | 2 | 2022-03-01 | 27 | Fail | Tuesday | Industrial | Manufacturing | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 2 | 3 | 1 | 3 | 2022-03-01 | 28 | Pass | Tuesday | Healthcare | Medical | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 3 | 4 | 2 | 1 | 2022-03-01 | 16 | Pass | Tuesday | Residential | - | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| 4 | 5 | 2 | 2 | 2022-03-01 | 16 | Pass | Tuesday | Industrial | Manufacturing | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2022-06-02 | 44 | Pass | Thursday | Industrial | Manufacturing | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1046 | 1047 | 9 | 3 | 2022-06-02 | 42 | Pass | Thursday | Healthcare | Medical | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1047 | 1048 | 10 | 1 | 2022-06-02 | 29 | Pass | Thursday | Residential | - | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1048 | 1049 | 10 | 2 | 2022-06-02 | 29 | Pass | Thursday | Industrial | Manufacturing | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1049 | 1050 | 10 | 3 | 2022-06-02 | 29 | Fail | Thursday | Healthcare | Medical | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
1050 rows × 13 columns
succesful_Treated_df=WstWtrtrt_TrtPlant_df[WstWtrtrt_TrtPlant_df['Result']=='Pass'].groupby('name',as_index=False)['Volume of Water Treated'].sum().sort_values(by='name')
succesful_Treated_df
| name | Volume of Water Treated | |
|---|---|---|
| 0 | Chicago Treatment Plant | 4110 |
| 1 | Denver Treatment Plant | 1221 |
| 2 | Los Angeles Treatment Plant | 4366 |
| 3 | Minneapolis Treatment Plant | 585 |
| 4 | New York Treatment Plant | 2792 |
| 5 | Oakland Treatment Plant | 1238 |
| 6 | Phoenix Treatment Plant | 2129 |
| 7 | Sacramento Treatment Plant | 543 |
| 8 | San Diego Treatment Plant | 2530 |
| 9 | San Francisco Treatment Plant | 2596 |
WstWtrtrt_TrtPlant_df_grouped = WstWtrtrt_TrtPlant_df_grouped.sort_values(by='name')
WstWtrtrt_TrtPlant_df_grouped
| name | Volume of Water Treated | |
|---|---|---|
| 0 | Chicago Treatment Plant | 4577 |
| 1 | Denver Treatment Plant | 1298 |
| 2 | Los Angeles Treatment Plant | 4983 |
| 3 | Minneapolis Treatment Plant | 677 |
| 4 | New York Treatment Plant | 3145 |
| 5 | Oakland Treatment Plant | 1518 |
| 6 | Phoenix Treatment Plant | 2377 |
| 7 | Sacramento Treatment Plant | 668 |
| 8 | San Diego Treatment Plant | 2810 |
| 9 | San Francisco Treatment Plant | 3133 |
succesful_Treated_df['Volume of Water Treated']/WstWtrtrt_TrtPlant_df_grouped['Volume of Water Treated']
0 0.897968 1 0.940678 2 0.876179 3 0.864106 4 0.887758 5 0.815547 6 0.895667 7 0.812874 8 0.900356 9 0.828599 Name: Volume of Water Treated, dtype: float64
succesful_Treated_df['Efficiency']=succesful_Treated_df['Volume of Water Treated']/WstWtrtrt_TrtPlant_df_grouped['Volume of Water Treated']*100
succesful_Treated_df['Efficiency']=succesful_Treated_df['Efficiency'].round(2)
succesful_Treated_df
| name | Volume of Water Treated | Efficiency | |
|---|---|---|---|
| 0 | Chicago Treatment Plant | 4110 | 89.80 |
| 1 | Denver Treatment Plant | 1221 | 94.07 |
| 2 | Los Angeles Treatment Plant | 4366 | 87.62 |
| 3 | Minneapolis Treatment Plant | 585 | 86.41 |
| 4 | New York Treatment Plant | 2792 | 88.78 |
| 5 | Oakland Treatment Plant | 1238 | 81.55 |
| 6 | Phoenix Treatment Plant | 2129 | 89.57 |
| 7 | Sacramento Treatment Plant | 543 | 81.29 |
| 8 | San Diego Treatment Plant | 2530 | 90.04 |
| 9 | San Francisco Treatment Plant | 2596 | 82.86 |
# Ploting effieciency of treatment plant
fig = px.bar(succesful_Treated_df.sort_values(by='Efficiency'),x='Efficiency',y='name',template='plotly_dark',color='Efficiency',text='Efficiency')
fig.update_layout(width=1000,title=dict(text='Efficiency of treatment plant' ,x=0.5))
# KPI 5 : Identify Day Wise Activity of Treatment Plant
WstWtrtrt_TrtPlant_df
| SampleID | PlantID | SourceID | Date | Volume of Water Treated | Result | Day Name | Source Name | Industry Sector | name | Location | Capacity | Capacity_new | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 1 | 2022-03-01 | 33 | Pass | Tuesday | Residential | - | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 1 | 2 | 1 | 2 | 2022-03-01 | 27 | Fail | Tuesday | Industrial | Manufacturing | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 2 | 3 | 1 | 3 | 2022-03-01 | 28 | Pass | Tuesday | Healthcare | Medical | San Francisco Treatment Plant | San Francisco, CA | 100 million gallons per day | 100 |
| 3 | 4 | 2 | 1 | 2022-03-01 | 16 | Pass | Tuesday | Residential | - | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| 4 | 5 | 2 | 2 | 2022-03-01 | 16 | Pass | Tuesday | Industrial | Manufacturing | Oakland Treatment Plant | Oakland, CA | 50 million gallons per day | 50 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1045 | 1046 | 9 | 2 | 2022-06-02 | 44 | Pass | Thursday | Industrial | Manufacturing | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1046 | 1047 | 9 | 3 | 2022-06-02 | 42 | Pass | Thursday | Healthcare | Medical | Chicago Treatment Plant | Chicago, IL | 150 million gallons per day | 150 |
| 1047 | 1048 | 10 | 1 | 2022-06-02 | 29 | Pass | Thursday | Residential | - | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1048 | 1049 | 10 | 2 | 2022-06-02 | 29 | Pass | Thursday | Industrial | Manufacturing | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
| 1049 | 1050 | 10 | 3 | 2022-06-02 | 29 | Fail | Thursday | Healthcare | Medical | New York Treatment Plant | New York, NY | 100 million gallons per day | 100 |
1050 rows × 13 columns
Dayname_wise_activty = WstWtrtrt_TrtPlant_df.groupby(['name','Day Name'],as_index = False)['Volume of Water Treated'].sum()
Dayname_wise_activty
| name | Day Name | Volume of Water Treated | |
|---|---|---|---|
| 0 | Chicago Treatment Plant | Friday | 660 |
| 1 | Chicago Treatment Plant | Monday | 639 |
| 2 | Chicago Treatment Plant | Saturday | 652 |
| 3 | Chicago Treatment Plant | Sunday | 657 |
| 4 | Chicago Treatment Plant | Thursday | 781 |
| ... | ... | ... | ... |
| 65 | San Francisco Treatment Plant | Saturday | 437 |
| 66 | San Francisco Treatment Plant | Sunday | 451 |
| 67 | San Francisco Treatment Plant | Thursday | 534 |
| 68 | San Francisco Treatment Plant | Tuesday | 361 |
| 69 | San Francisco Treatment Plant | Wednesday | 458 |
70 rows × 3 columns
fig = px.line(Dayname_wise_activty,x='name',y='Volume of Water Treated',color = 'Day Name',template='plotly_dark')
fig.update_layout(width=900,title=dict(text='Day wise Activity of Treatment Plant',x = 0.5))